1 module dataframe.variant; 2 import std.file; 3 import dataframe.common; 4 import dataframe.csv; 5 import std.conv; 6 import std.csv; 7 import std.datetime; 8 import std.exception; 9 import std.range:array, stride,only; 10 import std.stdio; 11 import std.variant; 12 import std.string:isNumeric; 13 import std.typecons:tuple,Tuple; 14 15 struct DataFrame 16 { 17 string title; 18 string indexTitle; 19 KalType indexType; 20 string[] columnTitles; 21 KalType[] columnTypes; 22 KalVariant[] indexValues; 23 KalVariant[] cellValues; 24 bool setSkipHeader=false; 25 char separator=','; 26 char quote='\"'; 27 28 /* auto asPriceBars(PriceBarType type, int dp)(KalDate date) 29 { 30 PriceBar!(type,dp)[string] ret; 31 ret.length=numRows; 32 foreach(i;0..numRows) 33 { 34 auto bar = new PriceBar!(type,dp); 35 bar.date=date; 36 bar.open=this[i,1].to!double; 37 bar.high=this[i,2].to!double; 38 bar.low=this[i,3].to!double; 39 bar.close=this[i,4].to!double; 40 bar.volume=this[i,5].to!double; 41 bar.openInterest=this[i,6].to!double; 42 ret[this[i,0].to!string]=bar; 43 } 44 return ret; 45 } 46 */ DataFrame setCellDimensions(size_t rows, size_t cols) 47 { 48 this.columnTitles.length=cols; 49 this.columnTypes.length=cols; 50 this.indexValues.length=rows; 51 this.cellValues.length=rows*cols; 52 return this; 53 } 54 DataFrame setCellColumns(size_t cols) 55 { 56 this.columnTitles.length=cols; 57 this.columnTypes.length=cols; 58 return this; 59 } 60 DataFrame setTitle(string title) 61 { 62 this.title=title; 63 return this; 64 } 65 DataFrame setIndexTitle(string indexTitle) 66 { 67 this.indexTitle=indexTitle; 68 return this; 69 } 70 DataFrame setIndexType(KalType type) 71 { 72 this.indexType=type; 73 return this; 74 } 75 DataFrame setColumnTitles(string[] titles) 76 { 77 this.columnTitles=titles; 78 return this; 79 } 80 DataFrame setColumnTypes(KalType[] columnTypes) 81 { 82 this.columnTypes=columnTypes; 83 this.columnTitles.length=columnTypes.length; 84 return this; 85 } 86 DataFrame setIndexValues(T)(T[] indexValues) 87 { 88 foreach(i,value;indexValues) 89 this.indexValues[i]=indexValues; 90 return this; 91 } 92 DataFrame setCellValues(KalVariant[][] cellValues) 93 { 94 foreach(i,row;cellValues) 95 { 96 foreach(j,cell;row) 97 { 98 this[i,j+1]=cell; 99 } 100 } 101 return this; 102 } 103 DataFrame setAllValues(KalVariant[][] values) 104 { 105 foreach(i,row;values) 106 { 107 this.indexValues[i]=values[i][0]; 108 foreach(j,cell;row[1..$]) 109 { 110 this[i,j+1]=cell; 111 } 112 } 113 return this; 114 } 115 116 DataFrame loadCSVFile(string csv, bool hasHeader=false) 117 { 118 auto file=std.file.read(csv); 119 return loadCSV(cast(string) file,hasHeader); 120 } 121 122 DataFrame setSkipFirstRow() 123 { 124 this.setSkipHeader=true; 125 return this; 126 } 127 DataFrame setNoSkipFirstRow() 128 { 129 this.setSkipHeader=false; 130 return this; 131 } 132 DataFrame setSeparator(char separator) 133 { 134 this.separator=separator; 135 return this; 136 } 137 DataFrame setQuote(char separator) 138 { 139 this.quote=separator; 140 return this; 141 } 142 143 size_t numCols() 144 { 145 return columnTypes.length+1; 146 } 147 148 size_t length() 149 { 150 return indexValues.length; 151 } 152 alias numRows=length; 153 154 KalVariant opIndex(size_t row, size_t col) 155 { 156 enforce((row>=0) && (col>=0) && (col <=numCols) &&(row<=indexValues.length)); 157 if(col==0) 158 return indexValues[row]; 159 else 160 return cellValues[row*numCols+col-1]; 161 } 162 163 auto opIndex(size_t[] rows, size_t[] cols) 164 { 165 KalVariant[][] ret; 166 ret.length=rows.length; 167 foreach(ref line;ret) 168 line.length=cols.length; 169 foreach(i,row;rows) 170 { 171 foreach(j,col;cols) 172 { 173 ret[i][j]=(col==0)?indexValues[row]:cellValues[row*numCols+col-1]; 174 } 175 } 176 return ret; 177 } 178 179 KalVariant opIndexAssign(T)(T value, size_t row, size_t col) 180 { 181 // enforce type safety for columns 182 enforce((row>=0) && (col>=0) && (col <=numCols) &&(row<=indexValues.length)); 183 auto val=value.to!KalVariant; 184 if (col==0) 185 indexValues[row]=val; 186 else 187 cellValues[row*numCols+col-1]=val; 188 return val; 189 } 190 191 auto columnValues(size_t col) 192 { 193 KalVariant[] ret; 194 foreach(i;0..numRows) 195 ret~=this[i,col]; 196 return ret; 197 } 198 ColumnType columnType(size_t col) 199 { 200 auto data=columnValues(col); 201 if (data.isDate!DateTime) 202 { 203 if (data.isDate!Date) 204 return ColumnType.Date; 205 return ColumnType.DateTime; 206 } 207 else if (data.isDouble) 208 { 209 if (data.isInteger!int) 210 return ColumnType.Int; 211 if (data.isInteger!long) 212 return ColumnType.Long; 213 return ColumnType.Double; 214 } 215 return ColumnType.String; 216 } 217 ColumnType[] findColumnTypes() 218 { 219 ColumnType[] ret; 220 foreach(i;0..numCols) 221 ret~=columnType(i); 222 return ret; 223 } 224 size_t[] opSlice(size_t i)(size_t start, size_t end) 225 if ((i==0)||(i==1)) 226 { 227 return iota(start,end); 228 } 229 230 size_t opDollar(size_t i)() 231 { 232 static if (i==0) 233 return numRows; 234 else static if(i==1) 235 return numCols; 236 else static assert(0); 237 } 238 239 string toString() 240 { 241 string ret="Kaleidic Dataframe: "~this.title~"\n\n"; 242 243 ret~=this.indexTitle; 244 foreach(j;1..numCols) 245 ret~="\t"~this.columnTitles[j-1]; 246 ret~="\n"; 247 //log("numRows="~numRows.to!string); 248 //log("numCols="~numCols.to!string); 249 250 foreach(i;0..numRows) 251 { 252 //log("row: "~i.to!string~": "~this.indexValues[i].to!string); 253 foreach(j;0..numCols) 254 ret~=this[i,j].to!string~"\t"; 255 ret~="\n"; 256 } 257 return ret; 258 } 259 } 260